library(tidyverse)
── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1 ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
R Version
--- _
platform x86_64-apple-darwin20
arch x86_64
os darwin20
system x86_64, darwin20
status
major 4
minor 3.1
year 2023
month 06
day 16
svn rev 84548
language R
version.string R version 4.3.1 (2023-06-16)
nickname Beagle Scouts
Package Versions
---
tidyverse 2.0.0
models_format <- c(
"_cghr10" = "",
"_" = " & ",
"chatgpt3" = "ChatGPT-3.5",
"chatgpt4" = "ChatGPT-4",
"insilicova" = "InSilicoVA",
"interva5" = "InterVA-5"
)
stage_format <- c(
"is_" = "",
"recon" = "Reconciliation",
"agreed" = "Agreement",
"adj" = "Adjudication"
)
Functions for PCCC and CSMF Accuracy.
# Calc PCCC
calc_pccc <- function(actual, pred, k = 1, N = NULL) {
# Calc N num of causes if not known
N <- if (is.null(N)) length(unique(na.omit(c(actual, pred)))) else N
# Calc frac of deaths in top k causes
TP <- actual == pred
TP[is.na(TP) | is.null(TP)] <- FALSE # for no preds
C <- sum(TP) / length(actual)
# Calc PCCC
out <- (C - (k/N)) / (1 - (k/N))
return(out)
}
# Calc CSMF accuracy
calc_csmf_acc <- function(actual, pred) {
# Get all unique causes
causes <- unique(c(actual, pred))
# Get csmfs
cases <- length(actual)
csmf_true <- table(actual) / cases
csmf_pred <- table(pred) / cases
# Correct for missing causes in either actual or pred
csmf_true <- vapply(
causes,
function(x) if (x %in% names(csmf_true)) csmf_true[x] else 0,
FUN.VALUE = numeric(1)
)
csmf_pred <- vapply(
causes,
function(x) if (x %in% names(csmf_pred)) csmf_pred[x] else 0,
FUN.VALUE = numeric(1)
)
# Calc csmf max error
csmf_max_error <- 2 * (1 - min(csmf_true))
# Calc csmf acc
out <- 1 - (sum(abs(csmf_true - csmf_pred)) / csmf_max_error)
return(out)
}
# Bulk calc per model
calc_per_model <- function(actual, pred, func, name = "Metric", ...) {
# Prep data in long format grouped by model
out <- pred %>%
pivot_longer( # to long format
everything(),
names_to = "Model",
values_to = "prediction"
) %>% group_by(Model)
# Calc k if pccc as it is diff for model combos
if (identical(func, calc_pccc)) {
out <- out %>%
summarise(
!!name := func(
actual,
prediction,
k = str_count(unique(Model), "_"),
...
)
)
} else { # otherwise apply func
out <- out %>%
summarise(
!!name := func(
actual,
prediction,
...
)
)
}
# Format display and return
out <- out %>%
mutate(Model = str_replace_all(Model, models_format)) %>%
arrange(across({{name}}, desc))
return(out)
}
# Bulk calc per model based on age, physician coding stage, and cause of death
calc_by <- function(
df,
func,
name = "Metric",
by_age = TRUE,
by_stage = TRUE,
by_sex = TRUE,
by_cod = TRUE,
by_age_range = TRUE,
...
) {
out <- list()
# Calc overall metric
out[[name]] <- calc_per_model(
actual = df %>% pull(physician_cghr10),
pred = df %>% select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ name }} := Metric
)
# Calc metric for va coding stage
if (by_stage) {
for (stage in c("is_agreed", "is_recon", "is_adj")) {
# Filter data for stage
df_filter <- df %>% filter(.[[stage]] == TRUE)
metric_name = paste0(
name,
" ",
str_replace_all(stage, stage_format)
)
# Calc metric for stage
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
}
}
# Calc pcc for each age group
if (by_age) {
for (age_group in c("adult", "child", "neo")) {
# Filter data for age
df_filter <- df %>% filter(age == age_group)
metric_name = paste0(
name,
" ",
str_to_title(age_group)
)
# Calc metric for age
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
# Calc metric for each stage within age group
for (stage in c("is_agreed", "is_recon", "is_adj")) {
# Filter data for stage within age group
df_filter <- df %>% filter(
.[[stage]] == TRUE & age == age_group
)
metric_name <- paste0(
name,
" ",
str_to_title(age_group),
" ",
str_replace_all(stage, stage_format)
)
# Calc metric for stage within age group
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
}
}
}
# Calc metric by sex for each age group
if (by_sex) {
# Calc metric by cod
for (a in c("adult", "child", "neo")) {
for (sx in c("Male", "Female")) {
# Filter data for cod
df_filter <- df %>% filter(
sex == sx & is_agreed == TRUE & age == a
)
metric_name <- sprintf(
"%s %s Sex Agree %s", name, str_to_title(a), sx
)
# Calc metric for cod
if (nrow(df_filter) > 0) {
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
}
}
}
}
# Calc metric by cause of death for each age group
if (by_cod) {
# Get unique causes of death
causes <- df %>%
select(ends_with("_cghr10")) %>%
pivot_longer(
everything(),
names_to = "column",
values_to = "cod"
) %>%
distinct(cod) %>%
filter(!is.na(cod)) %>%
pull(cod)
# Calc metric by cod
for (a in c("adult", "child", "neo")) {
for (cod in causes) {
# Filter data for cod
df_filter <- df %>% filter(
physician_cghr10 == cod & is_agreed == TRUE & age == a
)
metric_name <- sprintf(
"%s %s COD Agree %s", name, str_to_title(a), cod
)
# Calc metric for cod
if (nrow(df_filter) > 0) {
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
}
}
}
}
# Calc metric by age range
if (by_age_range) {
# Get unique age ranges
age_ranges <- df %>%
pull(age_range) %>%
unique
# Calc metric by age range
for (a in c("adult", "child", "neo")) {
for (ar in age_ranges) {
# Filter for age range
df_filter <- df %>% filter(
age_range == ar & is_agreed == TRUE & age == a
)
metric_name <- sprintf(
"%s %s Age Agree %s", name, str_to_title(a), ar
)
# Calc metric for cod
if (nrow(df_filter) > 0) {
out[[metric_name]] <- calc_per_model(
actual = df_filter %>%
pull(physician_cghr10),
pred = df_filter %>%
select(ends_with("_cghr10"), -physician_cghr10),
func = func,
...
) %>% rename(
{{ metric_name }} := Metric
)
}
}
}
}
# Combine all pccc metrics
out <- reduce(out,
function(x, y) left_join(x, y, by = "Model")
) %>%
arrange(across({{ name }}, desc))
return(out)
}
Load data from data folder.
raw_df <- read_csv("../data/healsl_rd1to2_cod_v1.csv")
Cases: 11920
Clean age_range labels to include only values and unit
of measure in titlecase.
df <- raw_df %>% mutate(
"age_range" = str_to_title(str_replace(
age_range,
"\\s*\\(.*\\)",
""
)),
"age_range" = if_else(
!str_detect(age_range, "Year|Years|Day|Days|month|Months|week|Weeks"),
paste0(age_range, " Years"),
age_range
)
)
df
Filter for cases that were coded by physicians.
df <- df %>%
filter(!is.na(physician_cghr10) & !is.null(physician_cghr10))
Physician Coded Cases: 11799
Create model combinations where if any of the models have the physician code, then set the combined model’s output for the case to be the physician code.
df <- df %>%
mutate( # Combine models
"chatgpt3_insilicova_cghr10" = if_else(
chatgpt3_cghr10 == physician_cghr10 |
insilicova_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt3_cghr10
),
"chatgpt4_insilicova_cghr10" = if_else(
chatgpt4_cghr10 == physician_cghr10 |
insilicova_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt4_cghr10
),
"chatgpt3_interva5_cghr10" = if_else(
chatgpt3_cghr10 == physician_cghr10 |
interva5_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt3_cghr10
),
"chatgpt4_interva5_cghr10" = if_else(
chatgpt4_cghr10 == physician_cghr10 |
interva5_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt4_cghr10
),
"chatgpt3_insilicova_interva5_cghr10" = if_else(
chatgpt3_cghr10 == physician_cghr10 |
insilicova_cghr10 == physician_cghr10 |
interva5_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt3_cghr10
),
"chatgpt4_insilicova_interva5_cghr10" = if_else(
chatgpt4_cghr10 == physician_cghr10 |
insilicova_cghr10 == physician_cghr10 |
interva5_cghr10 == physician_cghr10,
physician_cghr10,
chatgpt4_cghr10
)
)
Display case counts for raw data and prepared data after combining models and filtering for physician coded cases.
cat("\nRaw Data\n--------\n\n")
Raw Data
--------
# Physician info
raw_physicians <- raw_df$physician_cghr10
cat(paste0(
"Cases (",
length(unique(na.omit(raw_physicians))), " CODs): ",
length(raw_physicians),
"\n"
))
Cases (29 CODs): 11920
# Models cases
raw_models <- raw_df %>% select(ends_with("_cghr10"), -physician_cghr10)
for (mcol in colnames(raw_models)) {
m <- raw_models[[mcol]]
cat(paste0(
str_replace_all(mcol, models_format), " Predicted Cases (",
length(unique(na.omit(m))), " CODs): ",
length(m) - sum(is.na(m) | is.null(m)),
"\n"
))
}
ChatGPT-3.5 Predicted Cases (29 CODs): 11785
ChatGPT-4 Predicted Cases (29 CODs): 11703
InSilicoVA Predicted Cases (29 CODs): 11548
InterVA-5 Predicted Cases (28 CODs): 11665
cat("\nPrepared Data\n--------------\n")
Prepared Data
--------------
# Physician cases
physicians <- df$physician_cghr10
cat(paste0(
"Physician Coded Cases (",
length(unique(na.omit(physicians))), " CODs): ",
length(physicians) - sum(is.na(physicians) | is.null(physicians)),
"\n"
))
Physician Coded Cases (29 CODs): 11799
# Models cases
models <- df %>% select(ends_with("_cghr10"), -physician_cghr10)
for (mcol in colnames(models)) {
m <- models[[mcol]]
cat(paste0(
str_replace_all(mcol, models_format), " Predicted Cases (",
length(unique(na.omit(m))), " CODs): ",
length(m) - sum(is.na(m) | is.null(m)),
"\n"
))
}
ChatGPT-3.5 Predicted Cases (29 CODs): 11707
ChatGPT-4 Predicted Cases (29 CODs): 11628
InSilicoVA Predicted Cases (29 CODs): 11460
InterVA-5 Predicted Cases (28 CODs): 11575
ChatGPT-3.5 & InSilicoVA Predicted Cases (29 CODs): 11598
ChatGPT-4 & InSilicoVA Predicted Cases (29 CODs): 11550
ChatGPT-3.5 & InterVA-5 Predicted Cases (29 CODs): 11639
ChatGPT-4 & InterVA-5 Predicted Cases (29 CODs): 11577
ChatGPT-3.5 & InSilicoVA & InterVA-5 Predicted Cases (29 CODs): 11618
ChatGPT-4 & InSilicoVA & InterVA-5 Predicted Cases (29 CODs): 11572
Calculate metrics from model outputs compared to physician codes.
metrics <- list()
Calculate Partial Chance Corrected Concordance (PCCC) to evaluate indivudal performance for each model by age and physician coding stage.
# Get num of unique causes
ncauses <- df %>%
select(ends_with("_cghr10")) %>%
pivot_longer(
everything(),
names_to = "column",
values_to = "cod"
) %>%
distinct(cod) %>%
filter(!is.na(cod)) %>%
pull(cod) %>%
length
# Calc pccc for age, stage, and cod
metrics$pccc <- calc_by(df, calc_pccc, "PCCC", N = ncauses)
metrics$pccc
Calculate Cause Specific Mortality Fraction (CSMF) Accuracy to evaluate population performance for each model by age and physician coding stage.
metrics$csmf_acc <- calc_by(
df,
calc_csmf_acc,
"CSMF Accuracy",
by_cod = FALSE
)
metrics$csmf_acc
Save calculated metrics to data folder.
out <- reduce(metrics,
function(x, y) left_join(x, y, by = "Model")
)
write_csv(out, "../data/healsl_rd1to2_metrics_v1.csv")
# Num and perc of agreed cases
nall <- nrow(df)
nagree <- nrow(df %>% filter(is_agreed == TRUE))
pagree <- (nagree / nall) * 100
# Create labels to axis titles
y_title <- paste0("Physician Agreed Records (n=", nagree, ", 100%)")
Display a plot of cases by age group separated by sex.
# Calc age and sex counts
nadult <- nrow(df %>% filter(age == "adult" & is_agreed == TRUE))
nchild <- nrow(df %>% filter(age == "child" & is_agreed == TRUE))
nneo <- nrow(df %>% filter(age == "neo" & is_agreed == TRUE))
# Calc age and sex perc
padult <- (nadult / nagree) * 100
pchild <- (nchild / nagree) * 100
pneo <- (nneo / nagree) * 100
# Create age with counts
age_remap <- c(
"Adult" = paste0("Adult, 12+ years\n(n=", nadult, ", ", round(padult), "%)"),
"Child" = paste0("Child, 28 days to 11 years\n(n=", nchild, ", ", round(pchild), "%)"),
"Neo" = paste0("Neonatal, <28 days\n(n=", nneo, ", ", round(pneo), "%)")
)
# Format the data for plotting
asdata <- df %>%
filter(is_agreed == TRUE) %>%
group_by(age, sex) %>%
summarize(count = n()) %>%
mutate(
age = str_to_title(age),
sex = paste0(
sex, " (n=", count, ", ",
round(count / case_when(
age == "Adult" ~ nadult,
age == "Child" ~ nchild,
age == "Neo" ~ nneo
) * 100), "%)"
)
) %>%
mutate( # re-order age
age = factor(age, levels = c(
"Adult",
"Child",
"Neo"
))
) %>%
mutate( # rename ages with counts
age = recode(age, !!!age_remap)
)
`summarise()` has grouped output by 'age'. You can override using the `.groups` argument.
# Plot the data
asplot <- asdata %>%
ggplot(aes(x = factor(age), y = count, fill = sex)) +
geom_bar(
stat = "identity",
fill = "white",
color = "black"
) +
geom_text(
aes(label = sex),
position = position_stack(vjust = 0.5),
size = 2.5,
color = "black"
) +
labs(
y = y_title,
x = element_blank(),
fill = "Sex"
) +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))
)
# Save the plot
asplot
ggsave("../manuscript/fig-data-agesex.pdf", plot = asplot, dpi = 300)
Saving 7.29 x 4.51 in image
Display a plot for each age group of cases by age range.
for (a in c("adult", "child", "neo")) {
# Create label for age group
arows <- df %>% filter(is_agreed == TRUE & age == a) %>% nrow
alabel <- if (a == "neo") "Neonatal" else str_to_title(a)
atitle <- paste0("Physician Agreed ", alabel, " Records (n=", arows, ", 100%)")
# Format plot data
eardata <- df %>%
filter(is_agreed == TRUE & age == a) %>%
group_by(age_range) %>%
summarize(count = n()) %>%
separate(
age_range,
into = c("age_range_value", "age_range_unit"),
sep = " "
) %>%
separate(
age_range_value,
into = c("age_range_min", "age_range_max"),
sep = "-"
) %>%
mutate(
age_range_min = as.integer(age_range_min),
age_range_max = as.integer(age_range_max),
age_range_unit = factor(age_range_unit, levels = c(
"Weeks",
"Days",
"Months",
"Years"
)),
age_range_label = paste0(
age_range_min,
"-",
age_range_max,
" ",
age_range_unit,
"\n(n=", count, ", ", round((count / arows) * 100), "%)"
)
) %>%
arrange(age_range_unit, age_range_min, age_range_max) %>%
mutate(
age_range_label = factor(age_range_label, levels = unique(age_range_label))
)
# Plot the data
earplot <- eardata %>%
ggplot(aes(x = age_range_label, y = count)) +
geom_bar(
stat = "identity",
fill = "#1d1d1d",
color = "white",
width = 0.5
) +
labs(
y = atitle,
x = element_blank()
) +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))
) +
coord_flip()
# Save the plot
print(earplot)
ggsave(
sprintf("../manuscript/fig-data-agerange-%s.pdf", a),
plot = earplot,
dpi = 300
)
}
Display a plot for each age group of cases by cause of death.
for (a in c("adult", "child", "neo")) {
# Create label for age group
arows <- df %>% filter(is_agreed == TRUE & age == a) %>% nrow
alabel <- if (a == "neo") "Neonatal" else str_to_title(a)
atitle <- paste0("Physician Agreed ", alabel, " Records (n=", arows, ", 100%)")
# Format plot data
ecoddata <- df %>%
filter(is_agreed == TRUE & age == a) %>%
group_by(physician_cghr10) %>%
summarize(count = n()) %>%
mutate(
cod_label = paste0(
if_else(
str_count(physician_cghr10, "\\s+") > 3,
str_replace(physician_cghr10, "(\\S+\\s+\\S+\\s+\\S+) ", "\\1\n"),
physician_cghr10
),
"\n(n=", count, ", ",
if_else(
((count / arows) * 100) < 1,
"<1",
as.character(round((count / arows) * 100))
), "%)"
)
) %>%
arrange(count) %>%
mutate(
cod_label = factor(cod_label, levels = unique(cod_label))
)
# Plot the data
ecodplot <- ecoddata %>%
ggplot(aes(x = cod_label, y = count)) +
geom_bar(
stat = "identity",
fill = "#1d1d1d",
color = "white",
width = 0.5
) +
labs(
y = atitle,
x = element_blank()
) +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.text.x = element_text(hjust = 1)
) +
coord_flip()
# Save the plot
print(ecodplot)
ggsave(
sprintf("../manuscript/fig-data-cod-%s.pdf", a),
plot = ecodplot,
dpi = 300,
width = if (a == "adult") 6 else NA,
height = if (a == "adult") 8 else NA
)
}
Display a plot of model performance for all records versus records where physicians agreed on the COD code.
# Calc stage counts
nall <- nrow(df)
nagree <- nrow(df %>% filter(is_agreed == TRUE))
nrecon <- nrow(df %>% filter(is_recon == TRUE))
nadj <- nrow(df %>% filter(is_adj == TRUE))
# Calc stage perc
pagree <- (nagree / nall) * 100
precon <- (nrecon / nall) * 100
padj <- (nadj / nall) * 100
# Create stage with counts
stage_remap <- c(
"All" = paste0("All\nRecords\n(n=", nall, ", 100%)"),
"Agreed" = paste0("Physician Agreed\nRecords\n(n=", nagree, ", ", round(pagree), "%)"),
"Reconciled" = paste0("Reconciled\n(n=", nrecon, ", ", round(precon), "%)"),
"Adjudicated" = paste0("Adjudicated\n(n=", nadj, ", ", round(padj), "%)")
)
# Prepare boxplot data
alldata <- out %>%
select(
Model,
`PCCC`,
`PCCC Agreement`,
`CSMF Accuracy`,
`CSMF Accuracy Agreement`
) %>%
rename(
All = `PCCC`,
Agreed = `PCCC Agreement`
) %>%
filter(!str_detect(Model, "&")) %>% # remove multi models
pivot_longer( # transform to long format
cols = -c(
Model,
`CSMF Accuracy`,
`CSMF Accuracy Agreement`
),
names_to = "Stage",
values_to = "PCCC"
) %>%
group_by(Stage) %>%
mutate( # Add min, mid, and max model names to label
"PCCC Min" = if_else(
PCCC <= min(PCCC) + 0.05,
paste0(
Model, "\n(",
if_else(round(PCCC, 2) != round(min(PCCC), 2), paste0(round(PCCC, 2), ", "), ""),
"CSMF=", round(if_else(Stage == "All", `CSMF Accuracy`, `CSMF Accuracy Agreement`), 2), ")"),
NA
),
"PCCC Max" = if_else(
PCCC >= max(PCCC) - 0.05,
paste0(
Model, "\n(",
if_else(round(PCCC, 2) != round(max(PCCC), 2), paste0(round(PCCC, 2), ", "), ""),
"CSMF=", round(if_else(Stage == "All", `CSMF Accuracy`, `CSMF Accuracy Agreement`), 2), ")"),
NA
),
"PCCC Mid" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
paste0(Model, "\n(", round(PCCC, 2), ", CSMF=", round(if_else(Stage == "All", `CSMF Accuracy`, `CSMF Accuracy Agreement`), 2), ")"),
NA
),
"PCCC Mid Value" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
PCCC,
NA
),
"PCCC Min" = if_else( # Combine into one row if close PCCC
`PCCC` == min(PCCC),
if_else(
sum(!is.na(`PCCC Min`)) > 1,
paste0(na.omit(`PCCC Min`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Min`), "\\s*\\(.*\\,\\s*", "\n("),
collapse = "\n"
)
),
NA
),
"PCCC Max" = if_else(
`PCCC` == max(PCCC),
if_else(
sum(!is.na(`PCCC Max`)) > 1,
paste0(na.omit(`PCCC Max`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Max`), "\\s*\\(.*\\,\\s*", "\n("),
collapse = "\n"
)
),
NA
)
) %>%
mutate( # re-order stage
Stage = factor(Stage, levels = c(
"Agreed",
"All"
))
) %>%
mutate( # rename stages with counts
Stage = recode(Stage, !!!stage_remap)
)
# Plot boxplot
allplot <- alldata %>%
ggplot(aes(x = Stage, y = PCCC)) +
geom_boxplot(
linewidth = 0.5,
width = 0.25
) +
geom_point(
aes(y = `PCCC Mid Value`),
shape = 1,
size = 1.5,
fill = "white",
color = "darkgray",
alpha = 0.8,
position = position_nudge(x = -0.3)
) +
geom_text(
aes(label = `PCCC Min`),
color = "#4d4d4d",
size = 2,
hjust = 1,
position = position_nudge(y = -0.055)
) +
geom_text(
aes(label = `PCCC Max`),
color = "#4d4d4d",
size = 2,
hjust = 0,
position = position_nudge(y = 0.055)
) +
geom_text(
aes(label = `PCCC Mid`),
color = "#4d4d4d",
size = 2,
hjust = 1,
position = position_nudge(y = -0.015, x = -0.3)
) +
stat_summary( # min pccc txt on boxplot
geom = "text",
fun = min,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = -0.025),
size = 3
) +
stat_summary( # max pccc txt on boxplot
geom = "text",
fun = max,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = 0.025),
size = 3
) +
labs(
x = element_blank(),
y = "PCCC (0=Low, 1=High)"
) +
ylim(0.2, 0.85) +
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.line = element_line(color = "black"),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.line.x = element_line(
arrow = grid::arrow(length = unit(0.2, "cm"), ends = "both")
),
axis.line.y = element_blank()
)
# Save the plot
allplot
ggsave("../manuscript/fig-perf-allvsagree.pdf", plot = allplot, dpi = 300)
Saving 6 x 2.5 in image
Display a plot of performance for physician agreed records by age group.
# Calc age counts
nagree <- nrow(df %>% filter(is_agreed == TRUE))
nadult <- nrow(df %>% filter(age == "adult" & is_agreed == TRUE))
nchild <- nrow(df %>% filter(age == "child" & is_agreed == TRUE))
nneo <- nrow(df %>% filter(age == "neo" & is_agreed == TRUE))
# Calc age perc
padult <- (nadult / nagree) * 100
pchild <- (nchild / nagree) * 100
pneo <- (nneo / nagree) * 100
# Create age with counts
age_remap <- c(
"Adult" = paste0("Adult\n12+ years\n(n=", nadult, ", ", round(padult), "%)"),
"Child" = paste0("Child\n28 days to 11 years\n(n=", nchild, ", ", round(pchild), "%)"),
"Neonatal" = paste0("Neonatal\n<28 days\n(n=", nneo, ", ", round(pneo), "%)")
)
# Prepare boxplot data
agedata <- out %>%
select(
Model,
`PCCC Adult Agreement`,
`PCCC Child Agreement`,
`PCCC Neo Agreement`,
`CSMF Accuracy Adult Agreement`,
`CSMF Accuracy Child Agreement`,
`CSMF Accuracy Neo Agreement`
) %>%
rename(
Adult = `PCCC Adult Agreement`,
Child = `PCCC Child Agreement`,
Neonatal = `PCCC Neo Agreement`
) %>%
filter(!str_detect(Model, "&")) %>% # remove multi models
pivot_longer( # transform to long format
cols = -c(
Model,
`CSMF Accuracy Adult Agreement`,
`CSMF Accuracy Child Agreement`,
`CSMF Accuracy Neo Agreement`
),
names_to = "Age Group",
values_to = "PCCC"
) %>%
group_by(`Age Group`) %>%
mutate( # Add min, mid, and max model names to label
"PCCC Min" = if_else(
PCCC <= min(PCCC) + 0.05,
paste0(
Model, "\n(",
if_else(round(PCCC, 2) != round(min(PCCC), 2), paste0(round(PCCC, 2), ", "), ""),
"CSMF=", round(case_when(
`Age Group` == "Adult" ~ `CSMF Accuracy Adult Agreement`,
`Age Group` == "Child" ~ `CSMF Accuracy Child Agreement`,
`Age Group` == "Neonatal" ~ `CSMF Accuracy Neo Agreement`
),2), ")"),
NA
),
"PCCC Max" = if_else(
PCCC >= max(PCCC) - 0.05,
paste0(
Model, "\n(",
if_else(round(PCCC, 2) != round(max(PCCC), 2), paste0(round(PCCC, 2), ", "), ""),
"CSMF=", round(case_when(
`Age Group` == "Adult" ~ `CSMF Accuracy Adult Agreement`,
`Age Group` == "Child" ~ `CSMF Accuracy Child Agreement`,
`Age Group` == "Neonatal" ~ `CSMF Accuracy Neo Agreement`
),2), ")"),
NA
),
"PCCC Mid" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
paste0(
Model, "\n(", round(PCCC, 2),
", CSMF=", round(case_when(
`Age Group` == "Adult" ~ `CSMF Accuracy Adult Agreement`,
`Age Group` == "Child" ~ `CSMF Accuracy Child Agreement`,
`Age Group` == "Neonatal" ~ `CSMF Accuracy Neo Agreement`
),2), ")"),
NA
),
"PCCC Mid Value" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
PCCC,
NA
),
"PCCC Min" = if_else( # Combine into one row if close PCCC
`PCCC` == min(PCCC),
if_else(
sum(!is.na(`PCCC Min`)) > 1,
paste0(na.omit(`PCCC Min`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Min`), "\\s*\\(.*\\,\\s*", "\n("),
collapse = "\n"
)
),
NA
),
"PCCC Max" = if_else(
`PCCC` == max(PCCC),
if_else(
sum(!is.na(`PCCC Max`)) > 1,
paste0(na.omit(`PCCC Max`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Max`), "\\s*\\(.*\\,\\s*", "\n("),
collapse = "\n"
)
),
NA
)
) %>%
mutate( # re-order age
"Age Group" = factor(`Age Group`, levels = c(
"Neonatal",
"Child",
"Adult"
))
) %>%
mutate( # rename ages with counts
"Age Group" = recode(`Age Group`, !!!age_remap)
)
# Plot boxplot
ageplot <- agedata %>%
ggplot(aes(x = `Age Group`, y = PCCC)) +
geom_boxplot(
linewidth = 0.5,
width = 0.25
) +
geom_point(
aes(y = `PCCC Mid Value`),
shape = 1,
size = 1.5,
fill = "white",
color = "darkgray",
alpha = 0.8,
position = position_nudge(x = -0.3)
) +
geom_text(
aes(label = `PCCC Min`),
color = "#4d4d4d",
size = 2,
hjust = 1,
position = position_nudge(y = -0.055)
) +
geom_text(
aes(label = `PCCC Max`),
color = "#4d4d4d",
size = 2,
hjust = 0,
position = position_nudge(y = 0.055)
) +
geom_text(
aes(label = `PCCC Mid`),
color = "#4d4d4d",
size = 2,
hjust = 1,
position = position_nudge(y = -0.015, x = -0.3)
) +
stat_summary( # min pccc txt on boxplot
geom = "text",
fun = min,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = -0.025),
size = 3
) +
stat_summary( # max pccc txt on boxplot
geom = "text",
fun = max,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = 0.025),
size = 3
) +
labs(
x = paste0("Physician Agreed Records (n=", nagree, ", 100%)"),
y = "PCCC (0=Low, 1=High)"
) +
ylim(0.25, 0.9) +
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
plot.margin = margin(t = 16, b = 12, l = 12, r = 12),
axis.line = element_line(color = "black"),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.line.x = element_line(
arrow = grid::arrow(length = unit(0.2, "cm"), ends = "both")
),
axis.line.y = element_blank()
)
# Save the plot
ageplot
ggsave("../manuscript/fig-perf-agegroup.pdf", plot = ageplot, dpi = 300)
Saving 6 x 3.5 in image
Display a plot for each age group of performance for physician agreed records by sex.
for (a in c("adult", "child", "neo")) {
# Create label and ref for age group
arows <- df %>% filter(is_agreed == TRUE & age == a) %>% nrow
alabel <- if (a == "neo") "Neonatal" else str_to_title(a)
atitle <- paste0("Physician Agreed\n", alabel, " Records\n(n=", arows, ", 100%)")
aref <- str_to_title(a)
# Get unique sex columns
sex_col <- out %>% select(starts_with(sprintf(
"PCCC %s Sex Agree ",
aref
))) %>%
names
# Create sex with counts
sex_remap <- list()
sex_counts <- list()
for (sx_col in sex_col) {
# Get sex without prefix
sx <- gsub(sprintf("PCCC %s Sex Agree ", aref), "", sx_col)
# Calc num and perc cases for age range
nsex <- df %>% filter(sex == sx & is_agreed == TRUE & age == a) %>% nrow
psex <- (nsex / arows) * 100
psex_label <- if (round(psex) >= 1) round(psex) else "<1"
# Add label for age ranges
sex_remap[[sx]] <- paste0(
sprintf("%s %s", alabel, sx),
"\n(n=", nsex, ", ", psex_label, "%)"
)
# Store age range count data
sex_counts[[sx]] <- nsex
}
# Prepare boxplot data
sxdata <- out %>%
select(
Model,
starts_with(sprintf("PCCC %s Sex Agree", aref))
) %>%
rename_at(
vars(-Model),
~gsub(sprintf("PCCC %s Sex Agree ", aref), "", .)
) %>%
filter(!str_detect(Model, "&")) %>% # remove multi models
pivot_longer( # transform to long format
cols = -Model,
names_to = "Sex",
values_to = "PCCC"
) %>%
mutate( # rename cod with counts
"Sex" = recode(Sex, !!!sex_remap)
) %>%
group_by(Sex) %>%
mutate( # Make values less than 0 equal to 0
PCCC = if_else(PCCC <= 0, 0, PCCC)
) %>%
mutate( # Add min, mid, and max model names to label
"PCCC Min" = if_else(
PCCC <= min(PCCC) + 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(min(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Max" = if_else(
PCCC >= max(PCCC) - 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(max(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Mid" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
paste0(Model, "\n(", round(PCCC, 2), ")"),
NA
),
"PCCC Mid Value" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
PCCC,
NA
),
"PCCC Min" = if_else( # Combine into one row if close PCCC
`PCCC` == min(PCCC),
if_else(
sum(!is.na(`PCCC Min`)) > 1,
paste0(na.omit(`PCCC Min`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Min`), "\\s*\\(.*\\)", ""),
collapse = "\n"
)
),
NA
),
"PCCC Max" = if_else(
`PCCC` == max(PCCC),
if_else(
sum(!is.na(`PCCC Max`)) > 1,
paste0(na.omit(`PCCC Max`), collapse = "\n"),
paste0(str_replace(na.omit(`PCCC Max`), "\\s*\\(.*\\)", ""),
collapse = "\n")
),
NA
)
)
# Create cod order based on max pccc
sxorder <- sxdata %>%
group_by(Sex) %>%
summarise("PCCC Max Value" = max(PCCC, na.rm = TRUE)) %>%
select(Sex, `PCCC Max Value`) %>%
arrange(desc(`PCCC Max Value`)) %>%
pull(Sex)
sxdata$Sex <- factor(sxdata$Sex, levels = rev(sxorder))
# Plot boxplot
sxplot <- sxdata %>%
ggplot(aes(x = Sex, y = PCCC)) +
geom_boxplot(
linewidth = 0.5,
width = 0.25
) +
geom_point(
aes(y = `PCCC Mid Value`),
shape = 1,
size = 1.5,
fill = "white",
color = "darkgray",
alpha = 0.8,
position = position_nudge(x = -0.35)
) +
geom_text(
aes(label = `PCCC Min`),
color = "#4d4d4d",
size = 2.5,
hjust = 1,
position = position_nudge(y = -0.09)
) +
geom_text(
aes(label = `PCCC Mid`),
color = "#4d4d4d",
size = 2,
hjust = 1,
position = position_nudge(y = -0.025, x = -0.35)
) +
geom_text(
aes(label = `PCCC Max`),
color = "#4d4d4d",
size = 2.5,
hjust = 0,
position = position_nudge(y = 0.09)
) +
stat_summary( # min pccc txt on boxplot
geom = "text",
fun = min,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = -0.04),
size = 3
) +
stat_summary( # max pccc txt on boxplot
geom = "text",
fun = max,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = 0.04),
size = 3
) +
labs(
x = atitle,
y = "PCCC (0=Low, 1=High)"
) +
ylim(0.1, 1) +
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.line = element_blank(),
axis.line.y = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.line.x = element_line(
color = "black",
arrow = grid::arrow(length = unit(0.2, "cm"), ends = "both")
)
)
# Save the plot
print(sxplot)
ggsave(
sprintf("../manuscript/fig-perf-sex-%s.pdf", a),
plot = sxplot,
dpi = 300
)
}
Display a plot for each age group of performance for physician agreed records by age ranges.
for (a in c("adult", "child", "neo")) {
# Create label and ref for age group
arows <- df %>% filter(is_agreed == TRUE & age == a) %>% nrow
alabel <- if (a == "neo") "Neonatal" else str_to_title(a)
atitle <- if (a == "neo") {
paste0("Physician Agreed\n", alabel, " Records\n(n=", arows, ", 100%)")
} else {
paste0("Physician Agreed ", alabel, " Records (n=", arows, ", 100%)")
}
aref <- str_to_title(a)
# Get unique age range columns
arange_col <- out %>% select(starts_with(sprintf(
"PCCC %s Age Agree ",
aref
))) %>%
names
# Create age range with counts
arange_remap <- list()
arange_counts <- list()
for (ar_col in arange_col) {
# Get cod without prefix
ar <- gsub(sprintf("PCCC %s Age Agree ", aref), "", ar_col)
# Calc num and perc cases for age range
narange <- df %>% filter(age_range == ar & is_agreed == TRUE & age == a) %>% nrow
parange <- (narange / arows) * 100
parange_label <- if (round(parange) >= 1) round(parange) else "<1"
# Add label for age ranges
arange_remap[[ar]] <- paste0(
ar,
"\n(n=", narange, ", ", parange_label, "%)"
)
# Store age range count data
arange_counts[[ar]] <- narange
}
# Prepare boxplot data
ardata <- out %>%
select(
Model,
starts_with(sprintf("PCCC %s Age Agree", aref))
) %>%
rename_at(
vars(-Model),
~gsub(sprintf("PCCC %s Age Agree ", aref), "", .)
) %>%
filter(!str_detect(Model, "&")) %>% # remove multi models
pivot_longer( # transform to long format
cols = -Model,
names_to = "Age Range",
values_to = "PCCC"
) %>%
mutate( # rename cod with counts
"Age Range" = recode(`Age Range`, !!!arange_remap)
) %>%
group_by(`Age Range`) %>%
mutate( # Make values less than 0 equal to 0
PCCC = if_else(PCCC <= 0, 0, PCCC)
) %>%
mutate( # Add min, mid, and max model names to label
"PCCC Min" = if_else(
PCCC <= min(PCCC) + 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(min(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Max" = if_else(
PCCC >= max(PCCC) - 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(max(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Mid" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
paste0(Model, "\n(", round(PCCC, 2), ")"),
NA
),
"PCCC Min" = if_else( # Combine into one row if close PCCC
`PCCC` == min(PCCC),
if_else(
sum(!is.na(`PCCC Min`)) > 1,
paste0(na.omit(`PCCC Min`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Min`), "\\s*\\(.*\\)", ""),
collapse = "\n"
)
),
NA
),
"PCCC Max" = if_else(
`PCCC` == max(PCCC),
if_else(
sum(!is.na(`PCCC Max`)) > 1,
paste0(na.omit(`PCCC Max`), collapse = "\n"),
paste0(str_replace(na.omit(`PCCC Max`), "\\s*\\(.*\\)", ""),
collapse = "\n")
),
NA
)
)
# Create cod order based on max pccc
arorder <- ardata %>%
group_by(`Age Range`) %>%
summarise("PCCC Max Value" = max(PCCC, na.rm = TRUE)) %>%
select(`Age Range`, `PCCC Max Value`) %>%
arrange(desc(`PCCC Max Value`)) %>%
pull(`Age Range`)
ardata$`Age Range` <- factor(ardata$`Age Range`, levels = rev(arorder))
# Plot boxplot
arplot <- ardata %>%
ggplot(aes(x = `Age Range`, y = PCCC)) +
geom_boxplot(
linewidth = 0.5,
width = 0.25
) +
geom_text(
aes(label = `PCCC Min`),
color = "#4d4d4d",
size = 2.5,
hjust = 1,
position = position_nudge(y = -0.08)
) +
geom_text(
aes(label = `PCCC Max`),
color = "#4d4d4d",
size = 2.5,
hjust = 0,
position = position_nudge(y = 0.08)
) +
stat_summary( # min pccc txt on boxplot
geom = "text",
fun = min,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = -0.04),
size = 3
) +
stat_summary( # max pccc txt on boxplot
geom = "text",
fun = max,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = 0.04),
size = 3
) +
labs(
x = atitle,
y = "PCCC (0=Low, 1=High)"
) +
ylim(0.1, 1) +
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.line = element_blank(),
axis.line.y = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.line.x = element_line(
color = "black",
arrow = grid::arrow(length = unit(0.2, "cm"), ends = "both")
)
)
# Save the plot
print(arplot)
ggsave(
sprintf("../manuscript/fig-perf-agerange-%s.pdf", a),
plot = arplot,
dpi = 300,
width = if (a == "adult") 6 else if (a == "neo") 6 else NA,
height = if (a == "adult") 8 else if (a == "neo") 2.5 else NA
)
}
Plot model performance for physician agreed records by cause of death category.
for (a in c("adult", "child", "neo")) {
# Create label for age group
arows <- df %>% filter(is_agreed == TRUE & age == a) %>% nrow
alabel <- if (a == "neo") "Neonatal" else str_to_title(a)
atitle <- paste0("Physician Agreed ", alabel, " Records (n=", arows, ", 100%)")
aref <- str_to_title(a)
# Get unique cause columns
causes_col <- out %>% select(starts_with(sprintf(
"PCCC %s COD Agree ", aref
))) %>% names
# Create cod with counts
cod_remap <- list()
cod_counts <- list()
for (cod_col in causes_col) {
# Get cod without prefix
cod <- gsub(sprintf("PCCC %s COD Agree ", aref), "", cod_col)
# Calculate num and perc cases for cod
ncod <- df %>% filter(physician_cghr10 == cod & is_agreed == TRUE & age == a) %>% nrow
pcod <- (ncod / nagree) * 100
pcod_label <- if (round(pcod) >= 1) round(pcod) else "<1"
# Break cod into newlines if more than 3 words
if (str_count(cod, "\\s+") > 3) {
cod_label <- str_replace(cod, "(\\S+\\s+\\S+\\s+\\S+) ", "\\1\n")
} else {
cod_label <- cod
}
# Add label for cod
cod_remap[[cod]] <- paste0(cod_label, "\n(n=", ncod, ", ", pcod_label, "%)")
# Store cod count data
cod_counts[[cod]] <- ncod
}
# Prepare boxplot data
coddata <- out %>%
select(
Model,
starts_with(sprintf("PCCC %s COD Agree ", aref))
) %>%
rename_at(
vars(-Model),
~gsub(sprintf("PCCC %s COD Agree ", aref), "", .)
) %>%
filter(!str_detect(Model, "&")) %>% # remove multi models
pivot_longer( # transform to long format
cols = -Model,
names_to = "COD",
values_to = "PCCC"
) %>%
mutate( # rename cod with counts
"COD" = recode(`COD`, !!!cod_remap)
) %>%
group_by(`COD`) %>%
mutate( # Make values less than 0 equal to 0
PCCC = if_else(PCCC <= 0, 0, PCCC)
) %>%
mutate( # Add min, mid, and max model names to label
"PCCC Min" = if_else(
PCCC <= min(PCCC) + 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(min(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Max" = if_else(
PCCC >= max(PCCC) - 0.05,
paste0(
Model,
if_else(round(PCCC, 2) != round(max(PCCC), 2), paste0(" (", round(PCCC, 2), ")"), "")
),
NA
),
"PCCC Mid" = if_else(
is.na(`PCCC Min`) & is.na(`PCCC Max`),
paste0(Model, "\n(", round(PCCC, 2), ")"),
NA
),
"PCCC Min" = if_else( # Combine into one row if close PCCC
`PCCC` == min(PCCC),
if_else(
sum(!is.na(`PCCC Min`)) > 1,
paste0(na.omit(`PCCC Min`), collapse = "\n"),
paste0(
str_replace(na.omit(`PCCC Min`), "\\s*\\(.*\\)", ""),
collapse = "\n"
)
),
NA
),
"PCCC Max" = if_else(
`PCCC` == max(PCCC),
if_else(
sum(!is.na(`PCCC Max`)) > 1,
paste0(na.omit(`PCCC Max`), collapse = "\n"),
paste0(str_replace(na.omit(`PCCC Max`), "\\s*\\(.*\\)", ""),
collapse = "\n")
),
NA
)
)
# Create cod order based on max pccc
codorder <- coddata %>%
group_by(COD) %>%
summarise("PCCC Max Value" = max(PCCC, na.rm = TRUE)) %>%
select(COD, `PCCC Max Value`) %>%
arrange(desc(`PCCC Max Value`)) %>%
pull(COD)
coddata$COD <- factor(coddata$COD, levels = rev(codorder))
# Plot boxplot
codplot <- coddata %>%
ggplot(aes(x = COD, y = PCCC)) +
geom_boxplot(
linewidth = 0.5,
width = 0.25
) +
geom_text(
aes(label = `PCCC Min`),
color = "#4d4d4d",
size = 2.5,
hjust = 1,
position = position_nudge(y = -0.12)
) +
geom_text(
aes(label = `PCCC Max`),
color = "#4d4d4d",
size = 2.5,
hjust = 0,
position = position_nudge(y = 0.12)
) +
stat_summary( # min pccc txt on boxplot
geom = "text",
fun = min,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = -0.06),
size = 3
) +
stat_summary( # max pccc txt on boxplot
geom = "text",
fun = max,
aes(label = sprintf("%1.2f", after_stat(y))),
position = position_nudge(y = 0.06),
size = 3
) +
labs(
x = atitle,
y = "PCCC (0=Low, 1=High)"
) +
ylim(-0.3, 1.3) +
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
axis.line = element_blank(),
axis.line.y = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
axis.line.x = element_line(
color = "black",
arrow = grid::arrow(length = unit(0.2, "cm"), ends = "both")
)
)
# Save the plot
print(codplot)
ggsave(
sprintf("../manuscript/fig-perf-cod-%s.pdf", a),
plot = codplot,
dpi = 300,
width = if (a == "adult") 8 else NA,
height = if (a == "adult") 10 else NA
)
}